

#Get vehicle registration file names and paths
filenames_reg<-list.files(pattern="Gekentekende_voertuigen.*?.RData" )
filedate_reg<-str_match(filenames_reg, "Gekentekende_voertuigen_(.*?).RData" )[,2]
filedate_reg<-gsub("[Jj]anuari", "january",filedate_reg )
filedate_reg<-gsub("[Ff]ebruari", "february",filedate_reg )
filedate_reg<-gsub("[Mm]aart", "march",filedate_reg )
filedate_reg<-gsub("[Mm]ei", "may",filedate_reg )
filedate_reg<-gsub("[Jj]uni", "june",filedate_reg )
filedate_reg<-gsub("[Jj]uli", "july",filedate_reg )
filedate_reg<-gsub("[Aa]ugustus", "august",filedate_reg )
filedate_reg<-gsub("[Oo]ktober", "october",filedate_reg )
Files_reg<-cbind.data.frame(filenames_reg,as.Date(dmy(filedate_reg)),stringsAsFactors=FALSE)
colnames(Files_reg)<-c("path", "date")
Files_reg<- Files_reg[order(Files_reg$date),]
Files_reg$order<-order(Files_reg$date)
rm(filedate_reg, filenames_reg)

Files_reg<-subset(Files_reg, date<="2019-04-02")

#Load starting file (first)# 
load(Files_reg$path[1])

#Keep only passenger cars
x<-subset(x, Voertuigsoort=="Personenauto")
#Keep only certain body types (the largest groups)
x<-subset(x, Inrichting %in% c("cabriolet", "coupe", "hatchback", "MPV", "Niet geregistreerd", "sedan",
                               "stationwagen"))


#Drop cars with number of wheels different than 4
x<-subset(x, Aantal.wielen==4)

#Drop dots from variable names
names(x)<-gsub("\\.", "",names(x))

#All lower case
names(x)<-tolower(names(x))

#Transform factor in character
i <- sapply(x, is.factor)
x[i] <- lapply(x[i], as.character)

#Variable to keep:
Link_Char<-subset(x, select=c(kenteken, merk, handelsbenaming, brutobpm, inrichting, aantalzitplaatsen, aantalcilinders, cilinderinhoud, massaledigvoertuig, toegestanemaximummassavoertuig,
                              massarijklaar, zuinigheidslabel, catalogusprijs, aantaldeuren, typegoedkeuringsnummer, variant, uitvoering))


for(j in 2:18) {
  date<-Files_reg$date[j]
  load(Files_reg$path[j])
  
  #Keep only passenger cars
  x<-subset(x, Voertuigsoort=="Personenauto")
  #Keep only certain body times (the largest groups)
  x<-subset(x, Inrichting %in% c("cabriolet", "coupe", "hatchback", "MPV", "Niet geregistreerd", "sedan",
                                 "stationwagen"))

  
  #Drop cars with number of wheels different than 4
  x<-subset(x, Aantal.wielen==4)
  
  #Drop dots from variable names
  names(x)<-gsub("\\.", "",names(x))
  
  
  #All lower case
  names(x)<-tolower(names(x))
  
  #Transform factor in character
  i <- sapply(x, is.factor)
  x[i] <- lapply(x[i], as.character)
  
  Temp<-subset(x, !c(kenteken %in% Link_Char$kenteken) ,select=c(kenteken, merk, handelsbenaming, brutobpm, inrichting, aantalzitplaatsen, aantalcilinders, cilinderinhoud, massaledigvoertuig, toegestanemaximummassavoertuig,
                                                                 massarijklaar, zuinigheidslabel, catalogusprijs, aantaldeuren, typegoedkeuringsnummer, variant, uitvoering))
  
  Link_Char<-unique(rbind(Link_Char, Temp))   
  rm(x, Temp)  
}

save(Link_Char, file="Link_Char.RData")